# coding: utf-8
import pandas as pd
import numpy as np
import os
from copy import deepcopy, copy
from joblib import load, dump
import cvxpy as cvx

####################################################################################
####################################################################################
############################# AREA OF INPUT PARAMETERS #############################
####################################################################################
####################################################################################

##### Environment Parameters
PATH = "." # Root directory, should be the same path this "README.md" file locates
PATH_DATA = f"{PATH}/data" # Path for data
PATH_MODELS = f"{PATH}/models" # Path for models

##### Parameters for Bandits
size_norm = 50000 # T, use 50000 to reproduce the results
budget = 1600 # Budget constraint, to reproduce the results, run with 1600 and 2200
random_seed = 1990 # To reproduce the results, use 1990

####################################################################################
####################################################################################
############# Create Output Path, Load the data and Conversion Model ###############
####################################################################################
####################################################################################

##### Load Data and Model
dt_init = pd.read_parquet(f"{PATH_DATA}/dt_env.parq")
model_conversion = load(f"{PATH_MODELS}/conversion_model.pkl")
size_init = dt_init.shape[0]

##### Calculate the scaling factor
factor_scale = size_init / size_norm
budget_scale = budget * factor_scale

##### Create the folder for the output model
if os.path.isdir(f"{PATH_MODELS}/budget_{budget}"):
    pass
else:
    os.makedirs(f"{PATH_MODELS}/budget_{budget}")


####################################################################################
####################################################################################
############### Prepare the data and parameters for the bandits ####################
####################################################################################
####################################################################################

np.random.seed(random_seed)

##### Parameters of General Bandits
var_model = ["RISK_SCORE", "EDUCATION", "MARRIAGE", "AMOUNT_CLUSTER", "AGE_CLUSTER"]
var_model_onehot = list(model_conversion.feature_names_in_)
list_actions = [-1] + [10, 20, 35, 55, 80]

####################################################################################
####################################################################################
######################### Get Optimal Static Policy ################################
####################################################################################
####################################################################################

##### Setup the optimization environment
dt_optim = deepcopy(dt_init)
dt_optim_onehot = pd.get_dummies(dt_init[var_model + ["interest_rate"]], prefix_sep="_zl_")
dt_optim_onehot["interest_rate_save"] = dt_optim_onehot["interest_rate"]

for i_ in list_actions:
    if i_ == -1:
        dt_optim[f"conversion_{i_}"] = 0
        dt_optim[f"volume_{i_}"] = 0
        dt_optim[f"discount_amount_{i_}"] = 0
        dt_optim[f"discount_sum_{i_}"] = 0
    else:
        discount_ = i_ / 100
        dt_optim_onehot["interest_rate"] = dt_optim_onehot["interest_rate_save"] * (1 - discount_)
        dt_optim[f"conversion_{i_}"] = model_conversion.predict_proba(dt_optim_onehot[var_model_onehot])[:, 1]
        dt_optim[f"volume_{i_}"] = dt_optim[f"conversion_{i_}"] * dt_optim["amount_norm"]
        dt_optim[f"discount_amount_{i_}"] = dt_optim[f"conversion_{i_}"] * discount_ * dt_optim["discount_base_norm"]
        dt_optim[f"discount_sum_{i_}"] = (dt_optim[f"conversion_{i_}"] * discount_) / 7
dt_optim_onehot["interest_rate"] = dt_optim_onehot["interest_rate_save"]
dt_optim_onehot = dt_optim_onehot.drop("interest_rate_save", axis=1)
var_target = \
    [x for x in dt_optim.columns if ("volume_" in x) or ("discount_amount_" in x) \
     or ("discount_sum_" in x) or ("conversion_" in x)]

dt_optim["count"] = 1
dt_optim = dt_optim.groupby("index_context")[["count"] + var_target].sum().reset_index(drop = False)

conversion_matrix = np.array(dt_optim[[f"conversion_{i_}" for i_ in list_actions]])
target_matrix = np.array(dt_optim[[f"volume_{i_}" for i_ in list_actions]])
constraint2_matrix = np.array(dt_optim[[f"discount_amount_{i_}" for i_ in list_actions]])
constraint1_matrix = np.array(dt_optim[[f"discount_sum_{i_}" for i_ in list_actions]])

##### Run optimization with CVX

action_matrix = cvx.Variable(shape=(target_matrix.shape))
obj_func = cvx.sum(cvx.multiply(action_matrix, target_matrix))

constraint2 = cvx.sum(cvx.multiply(action_matrix, constraint1_matrix)) <= budget_scale
constraint1 = cvx.sum(cvx.multiply(action_matrix, constraint2_matrix)) <= budget_scale

constraint_non_negative = action_matrix >= 0
constraint_sum_1 = cvx.sum(action_matrix, axis=1) == 1

constraints = [constraint_non_negative, constraint_sum_1, constraint2, constraint1]

obj = cvx.Maximize(obj_func)

prob = cvx.Problem(obj, constraints)

try:
    prob.solve(verbose = True)
except:
    prob.solve(verbose = True, solver = "SCS")

print(f"Expected Reward: {obj_func.value * (1/factor_scale)}")
print(f"Expected Cost2: {cvx.sum(cvx.multiply(action_matrix, constraint2_matrix)).value * (1/factor_scale)}")
print(f"Expected Cost1: {cvx.sum(cvx.multiply(action_matrix, constraint1_matrix)).value * (1/factor_scale)}")

####################################################################################
####################################################################################
######################### Export Optimal Static Policy #############################
####################################################################################
####################################################################################

dict_mapping_actions = {i: list_actions[i] for i in range(len(list_actions))}

policy_optim = pd.DataFrame(deepcopy(action_matrix.value), columns=[x for x in list_actions])
policy_optim["index_context"] = dt_optim["index_context"]

dict_expect_optim_reward_costs = \
    {"expect_reward":obj_func.value * (1/factor_scale),
     "expect_cost1":cvx.sum(cvx.multiply(action_matrix, constraint2_matrix)).value * (1/factor_scale),
     "expect_cost2":cvx.sum(cvx.multiply(action_matrix, constraint1_matrix)).value * (1/factor_scale)}

dump(policy_optim, f"{PATH_MODELS}/budget_{budget}/policy_optim_static.pkl")
dump(dict_expect_optim_reward_costs, f"{PATH_MODELS}/budget_{budget}/dict_expect_optim_reward_costs.pkl")